
clear all

/// This file merges population and election data at the county-sldl level 

cd "$OutputPath/"
use county_sldl_pop, clear // a county-sldl dataset

statastates, f(fstate)
keep if _m==3
drop _m

rename (state_name) (state)
replace state=strproper(state)

tempfile base
save `base', replace

/// use 2010 maps for states that did not participate in 2000, but submitted useable pre-2010 plans (and match with turnout data)
// replace Iowa and Missouri post-2000 map with pre- 2010 map; matches 2004 elections
// replace Alaska 2007 map with 2012 map; supreme court intervened

keep if (state=="Alaska" | state=="Arkansas" | state=="California" | state=="Florida" | state=="Hawaii" | state=="Kentucky"  | state=="Maine" | state=="Pennsylvania" | state=="Tennessee" | state=="Wisconsin" ///
 | state=="Maryland" | state=="Minnesota" | state=="Montana" | state=="Nebraska" | state=="New Jersey"  ///
 | state=="New Hampshire" | state=="Oregon" | state=="Vermont" | state=="Iowa" | state=="Missouri" ) & year==2012
 
 
replace censusyear=2000
replace year=2007

tempfile replace2000
save `replace2000', replace

use `base', clear

drop if (state=="Alaska" | state=="Arkansas" | state=="California" | state=="Florida" | state=="Hawaii" | state=="Kentucky"  | state=="Maine" | state=="Pennsylvania" | state=="Tennessee" | state=="Wisconsin" ///
 | state=="Maryland" | state=="Minnesota" | state=="Montana" | state=="Nebraska" | state=="New Jersey" ///
 | state=="New Hampshire" | state=="Oregon" | state=="Vermont" | state=="Iowa" | state=="Missouri"  ) & year==2007
 
append using `replace2000'

/// Merge with sldl codes that correspond to elections_vtd.do data
gen sldlname=sldl
do "$CodePath/elections_csld_edits/elections_add_sldlcodes.do" //replace string districts with code
destring sldl, replace

tempfile base
save `base', replace

cd "$OutputPath/"
use elections_csldl.dta, clear 
append using elections_csldl_2014.dta 

merge 1:1 year fstate fcounty sldl using `base', gen(m_elec) 

	/* Evaluate merge
	
	* m_elec=2 because of availability of turnout data (improves over time) or because of uncontested election (treated as missing if no candidates on ballot)
	* m_elec=1 if there is turnout data and no corresponding area in the Census. 1.3% of observations. Some errors (sldl==0), some absentee votes, which are not assigned to a county (fcounty==.)
	
	gen true_error=m_elec==1 & fcounty!=. & sldl!=. & sldl!=0  // 0.44%  of observations
	
	egen error_rate=mean(true_error), by(fstate year)  		  //90th percentile is 0%, 95th percentile is 0.7% errors, 99th percentile is 5%
	*/ 
	
drop if m_elec==1

save `base', replace

//append statelegislators data 
use statelegislators, clear

merge 1:m fstate sldl year using `base', gen(m_leg)

	* m_elec=2 because of uncontested elections
	* no observations of m_elec=1 

// harmonize state-level vars
foreach v of varlist totaldistricts totaldistricts_senate termlength {

	egen s`v'=mean(`v'), by(state) 
	replace `v'=s`v'
	drop s`v'
}

// correct nmember
replace nmember=1 if (state!="Arizona" & state!="Idaho" & state!="Maryland" & state!="New Hampshire" & state!="New Jersey" & state!="North Dakota" & state!="South Dakota" & state!="Vermont"  & state!="West Virginia")

replace nmember=2 if state=="Arizona" | state=="Idaho" | state=="North Dakota" | state=="Washington"

// for NH -- use info from http://www.gencourt.state.nh.us/house/members/ 
replace nmember=1 if state=="New Hampshire" & (sldl==7 | sldl==206 |  sldl==207 | sldl==208 | sldl==210 | sldl==213 | sldl==405 | sldl==406 | sldl==407 | sldl==410 | sldl==411 | sldl==715 | sldl==716 | sldl==722 | sldl==723 | sldl==725 | sldl==726 | sldl==727 | sldl==728 | sldl==729 | sldl==908)
replace nmember=2 if state=="New Hampshire" & (sldl==6 | sldl==209 | sldl==211 | sldl==212 | sldl==409 | sldl==512 | sldl==513 | sldl==514 | sldl==515 | sldl==516 | sldl==517 | sldl==518 | sldl==519 | sldl==520 | sldl==524 | sldl==525 | sldl==526 | sldl==527 | sldl==719 | sldl==724)
replace nmember=3 if state=="New Hampshire" & (sldl==408 | sldl==522 | sldl==528 | sldl==529 | sldl==530 | sldl==531 | sldl==532 | sldl==533 | sldl==534 | sldl==535 | sldl==536 | sldl==717 | sldl==720)
replace nmember=4 if state=="New Hampshire" & (sldl==412 | sldl==413 | sldl==523 | sldl==714 | sldl==718 | sldl==721)
replace nmember=8 if state=="New Hampshire" & sldl==521
replace nmember=9 if state=="New Hampshire" & sldl==537


cd "$OutputPath/"
save county_sldl_pop_elec, replace 
